
# V - Build relevant dt, for all three samples ------------------------------------

Build_relevant_dt <- function(sample_name, cost = T) { 
  
  
  test_dt_withProb <- data.table::copy(
    eval(parse(text = paste0("test_dt_withProb_", sample_name))) 
  )
  
  if (nrow(unique(test_dt_withProb[, .(id_var, 
                                       S_index_date_XX, 
                                       S_sample_source_XX)])) !=
      nrow(test_dt_withProb)) {
    stop("Problem with uniqueness in test_dt_withProb")
  }
  
  if (sample_name =="cancer") {
    
    source(paste0(parent_directory, "get_cost_data_cancer.R"))
    dt_cost_final <- get_cost_data_cancer()
    
    
    dt_cost_for_exhibits <-
      merge(
        x = test_dt_withProb_cancer[, .(id_var, 
                                        S_index_date_XX, 
                                        S_sample_source_XX)],
        y = dt_cost_final,
        by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
        all.x = T,
        all.y = F
      ) 
    
  } else {
    
    if (cost == T) { 
      source(paste0(parent_directory, "get_cost_data_all.R"))
      dt_cost_final_all <- get_cost_data_all()
      
      dt_cost_for_exhibits <-
        merge(
          x = test_dt_withProb_all[, .(id_var, 
                                       S_index_date_XX, 
                                       S_sample_source_XX)],
          y = dt_cost_final_all,
          by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
          all.x = T,
          all.y = F
        ) 
    } else { 
      print("without cost") 
    }
  }
  
  # get dt_cost detailed for only the relevant obs 
  
  # check that cost data from dt_cost_for_exhibits is equal to cost data in test_dt_withProb
  if (ret_4th_table == TRUE) {
    dt_cost_detailed_agg <- merge(
      x = dt_cost_for_exhibits[difftime(S_index_date_XX, cost_date, units = "days") <= 366 &
                                 difftime(S_index_date_XX, cost_date, units = "days") > 0, 
                               .(UTL_365d_before_indexDate_total_cost = 
                                   sum(actual_cost, na.rm = T)),                
                               by = .(id_var, S_index_date_XX, S_sample_source_XX)],
      y = dt_cost_for_exhibits[difftime(cost_date, S_index_date_XX, units = "days") <= 365 &
                                 difftime(cost_date, S_index_date_XX, units = "days") >= 0, 
                               .(UTL_365d_after_indexDate_total_cost = 
                                   sum(actual_cost, na.rm = T)),                
                               by = .(id_var, S_index_date_XX, S_sample_source_XX)],
      by = c("id_car", "S_index_date_XX", "S_sample_source_XX"),
      all = T
    )
    
    temp_dt_for_check_cost_agg_d <- merge(
      x = test_dt_withProb[, .(id_var, S_index_date_XX, S_sample_source_XX,
                               UTL_f365d_total_cost, UTL_l365d_total_cost)],
      y = dt_cost_detailed_agg,
      by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
      all = T
    )
    temp_dt_for_check_cost_agg_d[, `:=` (
      diff_l365d = UTL_l365d_total_cost - UTL_365d_before_indexDate_total_cost,
      diff_f365d = UTL_f365d_total_cost - UTL_365d_after_indexDate_total_cost
    )]
    
    if (max(temp_dt_for_check_cost_agg_d$diff_f365d, na.rm = T) > 1 |
        max(temp_dt_for_check_cost_agg_d$diff_l365d, na.rm = T) > 1 |
        min(temp_dt_for_check_cost_agg_d$diff_f365d, na.rm = T) < -1 |
        min(temp_dt_for_check_cost_agg_d$diff_l365d, na.rm = T) < -1) {
      stop("Problem with matching cost!!")
    }
  } else {
    if (cost == T ) {
      dt_cost_detailed_agg <- NULL
      temp_dt_for_check_cost_agg_d <- merge(
        x = test_dt_withProb[, .(id_var, S_index_date_XX, S_sample_source_XX,
                                 UTL_f365d_total_cost)],
        y = dt_cost_for_exhibits[difftime(cost_date, S_index_date_XX, units = "days") <= 365 &
                                   difftime(cost_date, S_index_date_XX, units = "days") >= 0, 
                                 .(UTL_365d_after_indexDate_total_cost = 
                                     sum(actual_cost, na.rm = T)),
                                 by = .(id_var, S_index_date_XX, S_sample_source_XX)],
        by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
        all = T
      )
      temp_dt_for_check_cost_agg_d[, `:=` (
        diff_f365d = UTL_f365d_total_cost - UTL_365d_after_indexDate_total_cost
      )]
      
      if (max(temp_dt_for_check_cost_agg_d$diff_f365d, na.rm = T) > 1 |
          min(temp_dt_for_check_cost_agg_d$diff_f365d, na.rm = T) < -1) {
        stop("Problem with matching cost!!")
      }
    }
  }
  # get dt_cost_by_main_cat for only the relevant obs 
  if (sample_name =="cancer" ) {
    dt_cost_for_exhibit_byMainCat <-
      merge(
        x = test_dt_withProb[, .(id_var, 
                                 S_index_date_XX, 
                                 S_sample_source_XX)],
        y =  dt_cost_final[, 
                        .(actual_cost = sum(actual_cost, na.rm = T),
                          amount = sum(amount, na.rm=T)),
                        by = .(id_var, S_index_date_XX, S_sample_source_XX,
                               DMG_date_of_death_XX, DMG_died_within_365d, main_cat,
                               cost_date, event_date_end)]    ,
        by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
        all.x = T,
        all.y = F
      ) 
  } else {
   
    if (cost == T ){
      
    dt_cost_for_exhibit_byMainCat <-
      merge(
        x = test_dt_withProb[, .(id_var, 
                                 S_index_date_XX, 
                                 S_sample_source_XX)],
        y =   dt_cost_final_all[, 
                                .(actual_cost = sum(actual_cost, na.rm = T),
                                  amount = sum(amount, na.rm=T)),
                                by = .(id_var, S_index_date_XX, S_sample_source_XX,
                                       DMG_date_of_death_XX, DMG_died_within_365d, main_cat,
                                       cost_date, event_date_end)],
        by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
        all.x = T,
        all.y = F
      ) 
    } else {
      print("without cost 2 ")
      }
  }
  
  # get dt for only the relevant obs
  dt_for_exhibits <- get_test_data(test_dt_withProb,sample_name )
  
  if (cost == T ) {
    return(list(dt_for_exhibits, dt_cost_for_exhibits, dt_cost_for_exhibit_byMainCat))
  } else {
    return(dt_for_exhibits)
  }
}


get_test_data<-function(data, sample_name = "cancer") {
dt_for_exhibits <- 
  data[
    , `:=` (
      adm_any = (UTL_f365d_diff_count > 0 | 
                   UTL_f365d_hospPlanned_count > 0 |
                   UTL_f365d_hospUnplanned_count > 0),
      adm_any_urg = (UTL_f365d_hospUnplanned_count > 0),
      adm_any_nonUrg = (UTL_f365d_diff_count > 0 | UTL_f365d_hospPlanned_count > 0),
      adm_num = (UTL_f365d_diff_count + 
                   UTL_f365d_hospPlanned_count + 
                   UTL_f365d_hospUnplanned_count),
      adm_len = (UTL_f365d_diff_adm_days + 
                   UTL_f365d_hospPlanned_admDays + 
                   UTL_f365d_hospUnplanned_admDay)
    )
    ]

dt_for_exhibits[, backfill_total_cost := dplyr::if_else(
  DMG_died_within_365d == "1",
  UTL_365d_beforeDeath_totalCost,
  UTL_f365d_total_cost
)]

dt_for_exhibits[, num_days_lived := ifelse(DMG_died_within_365d == "0",
                                           366,
                                           as.numeric(difftime(DMG_date_of_death_XX,
                                                               S_index_date_XX, 
                                                               units = "days"))+1)]
if (sample_name == "cancer") {
  data.table::setnames(dt_for_exhibits, 
                       "DMG_age_quintiles", 
                       "DMG_age_quintiles_fullData")  
}
dt_for_exhibits[, DMG_age_quintiles := 
                  gtools::quantcut(DMG_age, seq(0, 1, 0.2))]
return(dt_for_exhibits)
}
